'''
增量爬取 爬取成功的数据存入数据库 再次爬取时数据库中有相同的跳过
'''

import requests
import random
import redis


def should_request(url):
    client = redis.Redis(host='localhost', port=6379, db=0)
    urls = client.lrange('requested_urls', 0, -1)
    return url.encode() in urls


def add_redis(url):
    client = redis.Redis(host='localhost', port=6379, db=0)
    client.lpush('requested_urls', url)
    client.close()


def main():
    urls = [f'http://httpbin.org/get?a={random.randint(1, 100)}' for i in range(100)]
    urls = set(urls)
    for url in urls:
        if not should_request(url):
            res = requests.get(url)
            print(res)
            add_redis(url)
        else:
            print(f"重复{url}")


main()
